LDS Data Manipulation for Spatial Exante Analytics
Author
Maxwell Mkondiwa
1 Introduction
In this workbook, we show how the data manipulation steps for the LCAS data to do spatial exante analytics. The data manipulation steps include: (a) variable construction, (b) combine the LCAS with geovariables, e.g., soil grids, and (c) combine the LCAS to climate variables. We then show an interactive table that shows the merged data. We then use the data as inputs in subsequent spatial exante workflows.
We first clear the working, load all the packages and import the data from dataverse. The data is on CIMMYT CSISA dataverse: https://data.cimmyt.org/dataset.xhtml?persistentId=hdl:11529/10548507. To download the data, we use “agro’’ R package.
# ConversionsLDS$C.q306_cropLarestAreaHA=LDS$C.q306_cropLarestAreaAcre*0.405#acre to haLDS$yield_kgperha=LDS$L.tonPerHectare*1000#t/ha to kg per haLDS$L.q607_farmGatePricePerKg=LDS$L.q607_farmGatePrice/100# convert to price per kg# Calculate N, P appliedLDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="10_26_26"]=0.10LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="12_32_16"]=0.12LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.20LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20LDS$F.q51071_gradeNPKN[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20LDS$F.q51071_gradeNPKN=as.numeric(LDS$F.q51071_gradeNPKN)LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="12_32_16"]=0.32LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="14_35_14"]=0.35LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-0-13"]=0.20LDS$F.q51071_gradeNPKP[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.20LDS$F.q51071_gradeNPKP=as.numeric(LDS$F.q51071_gradeNPKP)LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="10_26_26"]=0.26LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="12_32_16"]=0.16LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="14_35_14"]=0.14LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-13-13"]=0.13LDS$F.q51071_gradeNPKK[LDS$F.q51071_gradeNPK=="Other20-20-13"]=0.13LDS$F.q51071_gradeNPKK=as.numeric(LDS$F.q51071_gradeNPKK)# NPKS -----------LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSN[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSN=as.numeric(LDS$F.q51211_gradeNPKSN)LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSP[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSP=as.numeric(LDS$F.q51211_gradeNPKSP)LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.16LDS$F.q51211_gradeNPKSK[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.20LDS$F.q51211_gradeNPKSK=as.numeric(LDS$F.q51211_gradeNPKSK)LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="16_20_0_13"]=0.13LDS$F.q51211_gradeNPKSS[LDS$F.q51211_gradeNPKS=="20_20_0_13"]=0.13LDS$F.q51211_gradeNPKSS=as.numeric(LDS$F.q51211_gradeNPKSS)# Nutrient Content ----------------------# Taken from Cedrez, Chamberlain, Guo and Hijmans, p3### N -----------------------------------LDS$F.totAmtDAPN=LDS$F.totAmtDAP*0.18LDS$F.totAmtUreaN=LDS$F.totAmtUrea*0.46LDS$F.totAmtNPKN=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKNLDS$F.totAmtTSPN=LDS$F.totAmtTSP*0LDS$F.totAmtSSPN=LDS$F.totAmtSSP*0LDS$F.totAmtNPKSN=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSNLDS$N=rowSums(LDS[,c("F.totAmtDAPN","F.totAmtUreaN","F.totAmtNPKN","F.totAmtTSPN","F.totAmtSSPN","F.totAmtNPKSN")],na.rm =TRUE)LDS$Nperha=LDS$N/LDS$C.q306_cropLarestAreaHALDS$NperhaSq=LDS$Nperha*LDS$Nperha### P ------------------------------------LDS$F.totAmtDAPP=LDS$F.totAmtDAP*0.46LDS$F.totAmtUreaP=LDS$F.totAmtUrea*0LDS$F.totAmtNPKP=LDS$F.totAmtNPK*LDS$F.q51071_gradeNPKPLDS$F.totAmtTSPP=LDS$F.totAmtTSP*0.45LDS$F.totAmtSSPP=LDS$F.totAmtSSP*0.2LDS$F.totAmtNPKSP=LDS$F.totAmtNPKS*LDS$F.q51211_gradeNPKSPLDS$P2O5=rowSums(LDS[,c("F.totAmtDAPP","F.totAmtUreaP","F.totAmtNPKP","F.totAmtTSPP","F.totAmtSSPP","F.totAmtNPKSP")],na.rm =TRUE)LDS$P2O5perha=LDS$P2O5/LDS$C.q306_cropLarestAreaHA# Creating dummy variables ------------------------LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="female"]=1LDS$A.q111_fGenderdum[LDS$A.q111_fGender=="male"]=0varieties=read.csv("LDS wheat variety maturity class.csv")LDS=merge(LDS,varieties, by="D.q410_varName",all.x=TRUE)LDS$variety_type_NMWV[LDS$variety_type=="NMWV"]=1LDS$variety_type_NMWV[LDS$variety_type=="EMWV"]=0LDS$variety_type_NMWV=as.numeric(LDS$variety_type_NMWV)# Sowing time new --------------------------------------------------------------LDS$Sowdate=LDS$D.seedingSowingTransplantinglibrary(tidyr)LDS=LDS %>%separate(Sowdate, c("Sday","Smonth", "Syear"))table(LDS$Sday)
The survey data contains approximate GPS locations of the plots. We can use these to extract soil and climate variables that are then included in crop response function.
# Function to add Geo-variables library(sf)library(sp)library(rgdal)library(terra)library(raster)library(geodata)# add_secondary_lcas <- function (df) {# # Remove duplicates and NAs in geo-coordinates# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))# df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$O.largestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))# df_sf=st_as_sf(df_sp)# # population=population(2020,05,path=tempdir())# population_geodata=terra::extract(population,vect(df_sf),fun=mean,df=TRUE)# elevationglobal_geodata=elevation_global(0.5,path=tempdir())# elevation_geodata=terra::extract(elevationglobal_geodata,vect(df_sf),fun=mean,df=TRUE)# Soilsand=soil_world("sand",depth=5,path=tempdir())# Soilsand_lds=terra::extract(Soilsand,vect(df_sf),fun=mean,df=TRUE)# Totalnitrogen=soil_world("nitrogen",depth=5,path=tempdir())# Totalnitrogen_lds=terra::extract(Totalnitrogen,vect(df_sf),fun=mean,df=TRUE)# soilsoc=soil_world("soc",depth=15,path=tempdir())# soilsoc_lds=terra::extract(soilsoc,vect(df_sf),fun=mean,df=TRUE)# # # Merge all soils and population# geodata_df <- list(population_geodata,elevation_geodata,Soilsand_lds,Totalnitrogen_lds,soilsoc_lds)# geodata_df=Reduce(function(x, y) merge(x, y, all=TRUE),geodata_df)# #geodata_df=return(data.frame(geodata_df))# write.csv(geodata_df,paste0("geovariables",".csv"))# }# add_secondary_lcas(LDS)library(rio)geovariables=import("geovariables.csv")LDS=cbind(LDS,geovariables)
4 Climate variables
The geodata R package has aggregated rainfall and temperature variables. However, we need climate variables specific to the corresponding growing season.
library(ncdf4)library(raster)library(terra)library(sf)library(data.table)library(exactextractr)#RUN ONCE# add_temp_precip_lcas <- function (df) {# # Remove duplicates and NAs in geo-coordinates# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Longitude)))# #df=subset(df,!(duplicated(df$O.largestPlotGPS.Latitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Longitude)))# df=subset(df,!(is.na(df$O.largestPlotGPS.Latitude)))# df_sp= SpatialPointsDataFrame(cbind(df$O.largestPlotGPS.Longitude,df$OlargestPlotGPS.Latitude),data=df,proj4string=CRS("+proj=longlat +datum=WGS84"))# # df_sf=st_as_sf(df_sp)# version = "501"# start.yr = 1960# num.yrs = ifelse(version=="501", (2017-start.yr+1), (2010-start.yr+1))# udel.temp.filename = paste0("air.mon.mean.v",version,".nc")# udel.precip.filename = paste0("precip.mon.total.v",version,".nc")# # Output location to write results to# out.filename = paste0("UDel.aggregated.public.v",version,".csv")# out.filename2017 = paste0("UDel.aggregated2017.public.v",version,".csv")# yr.offset = start.yr-1900# temps = subset(brick(udel.temp.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))# precip = subset(brick(udel.precip.filename), (yr.offset*12+1):(12*(yr.offset+num.yrs)))# # 1. Aggregate across months within a year: mean for temp, sum for precip# annual.temps = stackApply(temps, indices = rep(1:num.yrs, each=12), fun=mean)# annual.precip = stackApply(precip, indices = rep(1:num.yrs, each=12), fun=sum)# # 2. Aggregate spatially.# annual.temps = rotate(annual.temps)# annual.precip = rotate(annual.precip)# # df_sf$idmatching=1:nrow(df_sf)# # # Aggregate temperatures# ctry.temps = rbindlist(lapply(1:num.yrs, FUN=function(yr) {# ctry.temps = extract(annual.temps[[yr]], df_sf)# # Create data.table of results for this year, including the year# return(data.table(hhid=df_sf$idmatching, temp=ctry.temps, yr=yr-1+start.yr))# }))# # #Aggregate precipitation# # Note here we're going to multiply precip data by 10.# # The UDel data is in cm/year, but Burke et al use mm/year.# ctry.precip = rbindlist(lapply(1:num.yrs, FUN=function(yr) {# cropped.precip = annual.precip[[yr]]*10# ctry.precip = extract(cropped.precip, df_sf)# # Create data.table of results for this year, including the year# return(data.table(hhid=df_sf$idmatching, precip=ctry.precip, yr=yr-1+start.yr))# }))# # # Combine these results and save# all.udel.data = merge(ctry.temps, ctry.precip, by=c("hhid", "yr"))# all.udel.data_2017=subset(all.udel.data,all.udel.data$yr=="2017")# fwrite(all.udel.data, out.filename)# fwrite(all.udel.data_2017, out.filename2017)# }# # add_temp_precip_lcas(LDS)## Temperature and Rainfall -------------------tempprecip=read.csv("UDel.aggregated2017.public.v501.csv")tempprecipall=read.csv("UDel.aggregated.public.v501.csv")tempprecipallwide=reshape(tempprecipall, direction ="wide", idvar ="hhid", timevar ="yr")tempprecipallwide_small=subset(tempprecipallwide, select=c("precip.2007","temp.2008","precip.2008","temp.2009","precip.2009","temp.2010","precip.2010","temp.2011","precip.2011","temp.2012","precip.2012","temp.2013","precip.2013","temp.2014","precip.2014","temp.2015","precip.2015","temp.2016","precip.2016","temp.2017","precip.2017"))LDS=cbind(LDS,tempprecip,tempprecipallwide_small)# Interactive table of the datalibrary(reactable)reactable(LDS)